This document aims to provide further examples in how to use the hpgltools.
Note to self, the header has rmarkdown::pdf_document instead of html_document or html_vignette because it gets some bullcrap error ‘margins too large’…
Here are the commands I invoke to get ready to play with new data, including everything required to install hpgltools, the software it uses, and the fission data.
## These first 4 lines are not needed once hpgltools is installed.
## source("http://bioconductor.org/biocLite.R")
## biocLite("devtools")
## library(devtools)
## install_github("elsayed-lab/hpgltools")
library(hpgltools)
require.auto("fission")
## [1] 0
tt <- sm(library(fission))
tt <- data(fission)
All the work I do in Dr. El-Sayed’s lab makes some pretty hard assumptions about how data is stored. As a result, to use the fission data set I will do a little bit of shenanigans to match it to the expected format. Now that I have played a little with fission, I think its format is quite nice and am likely to have my experiment class instead be a SummarizedExperiment.
## Extract the meta data from the fission dataset
meta <- as.data.frame(fission@colData)
## Make conditions and batches
meta$condition <- paste(meta$strain, meta$minute, sep=".")
meta$batch <- meta$replicate
meta$sample.id <- rownames(meta)
## Grab the count data
fission_data <- fission@assays$data$counts
## This will make an experiment superclass called 'expt' and it contains
## an ExpressionSet along with any arbitrary additional information one might want to include.
## Along the way it writes a Rdata file which is by default called 'expt.Rdata'
fission_expt <- create_expt(metadata=meta, count_dataframe=fission_data)
## Bringing together the count matrix and gene information.
Travis wisely imposes a limit on the amount of time for building vignettes. My tools by default will attempt all possible pairwise comparisons, which takes a long time. Therefore I am going to take a subset of the data and limit these comparisons to that.
fun_data <- expt_subset(fission_expt, subset="condition=='wt.120'|condition=='wt.30'")
fun_norm <- sm(normalize_expt(fun_data, batch="limma", norm="quant", transform="log2", convert="cpm"))
limma_comparison <- sm(limma_pairwise(fun_data))
names(limma_comparison$all_tables)
## [1] "wt.30_vs_wt.120"
summary(limma_comparison$all_tables$wt.30_vs_wt.120)
## logFC AveExpr t P.Value
## Min. :-4.278 Min. :-4.58 Min. :-88.48 Min. :0.0000
## 1st Qu.:-0.399 1st Qu.: 1.11 1st Qu.: -2.60 1st Qu.:0.0192
## Median :-0.020 Median : 3.97 Median : -0.13 Median :0.1240
## Mean : 0.008 Mean : 3.11 Mean : -0.17 Mean :0.2792
## 3rd Qu.: 0.300 3rd Qu.: 5.44 3rd Qu.: 1.72 3rd Qu.:0.4653
## Max. : 7.075 Max. :18.59 Max. : 62.44 Max. :1.0000
## adj.P.Val B
## Min. :0.0170 Min. :-8.29
## 1st Qu.:0.0767 1st Qu.:-6.58
## Median :0.2479 Median :-5.50
## Mean :0.3686 Mean :-4.87
## 3rd Qu.:0.6204 3rd Qu.:-3.50
## Max. :1.0000 Max. : 4.83
scatter_wt_mut <- extract_coefficient_scatter(limma_comparison, type="limma", x="wt.30", y="wt.120", gvis_filename=NULL)
## This can do comparisons among the following columns in the pairwise result:
## wt.120, wt.30
## Actually comparing wt.30 and wt.120.
scatter_wt_mut$scatter
scatter_wt_mut$both_histogram$plot + ggplot2::scale_y_continuous(limits=c(0,0.20))
## Warning: Removed 4 rows containing missing values (geom_bar).
ma_wt_mut <- extract_de_ma(limma_comparison, type="limma")
ma_wt_mut$plot
deseq_comparison <- sm(deseq2_pairwise(fun_data))
summary(deseq_comparison$all_tables$wt.30_vs_wt.120)
## baseMean logFC lfcSE stat
## Min. : 0 Min. :-3.9 Min. :0.1 Min. :-20.9
## 1st Qu.: 28 1st Qu.:-0.3 1st Qu.:0.2 1st Qu.: -1.2
## Median : 192 Median : 0.0 Median :0.2 Median : 0.0
## Mean : 1703 Mean : 0.0 Mean :0.3 Mean : 0.2
## 3rd Qu.: 536 3rd Qu.: 0.3 3rd Qu.:0.3 3rd Qu.: 1.2
## Max. :4924000 Max. : 6.4 Max. :0.5 Max. : 30.4
## NA's :404 NA's :404 NA's :404
## P.Value adj.P.Val
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0206 1st Qu.:0.0717
## Median :0.2567 Median :0.4697
## Mean :0.3604 Mean :0.4797
## 3rd Qu.:0.6558 3rd Qu.:0.8612
## Max. :1.0000 Max. :1.0000
##
scatter_wt_mut <- extract_coefficient_scatter(deseq_comparison, type="deseq", x="wt.30", y="wt.120", gvis_filename=NULL)
## This can do comparisons among the following columns in the pairwise result:
## NA, wt.120, wt.30, r1, r2, r3
## Actually comparing wt.30 and wt.120.
scatter_wt_mut$scatter
ma_wt_mut <- extract_de_ma(deseq_comparison, type="deseq")
ma_wt_mut$plot
edger_comparison <- sm(edger_pairwise(fun_data, model_batch=TRUE))
scatter_wt_mut <- extract_coefficient_scatter(edger_comparison, type="edger", x="wt.30", y="wt.120", gvis_filename=NULL)
## This can do comparisons among the following columns in the pairwise result:
## wt.120, wt.30
## Actually comparing wt.30 and wt.120.
scatter_wt_mut$scatter
ma_wt_mut <- extract_de_ma(edger_comparison, type="edger")
ma_wt_mut$plot
basic_comparison <- sm(basic_pairwise(fun_data))
summary(basic_comparison$all_tables$wt.30_vs_wt.120)
## numerator_median denominator_median numerator_var denominator_var
## Min. :-2.73 Min. :-3.60 Length:5505 Length:5505
## 1st Qu.: 3.31 1st Qu.: 3.31 Class :character Class :character
## Median : 4.65 Median : 4.63 Mode :character Mode :character
## Mean : 4.71 Mean : 4.71
## 3rd Qu.: 5.94 3rd Qu.: 5.93
## Max. :18.61 Max. :18.61
## t p logFC adjp
## Min. :-49.10 Length:5505 Min. :-4.263 Length:5505
## 1st Qu.: -1.53 Class :character 1st Qu.:-0.406 Class :character
## Median : 0.39 Mode :character Median :-0.070 Mode :character
## Mean : 0.16 Mean : 0.008
## 3rd Qu.: 2.10 3rd Qu.: 0.297
## Max. : 50.21 Max. : 7.485
scatter_wt_mut <- extract_coefficient_scatter(basic_comparison, type="basic")
## This can do comparisons among the following columns in the pairwise result:
## wt.120, wt.30
## Actually comparing wt.120 and wt.30.
scatter_wt_mut$scatter
ma_wt_mut <- extract_de_ma(basic_comparison, type="basic")
ma_wt_mut$plot
all_comparisons <- sm(all_pairwise(fun_data, model_batch=TRUE))
all_combined <- sm(combine_de_tables(all_comparisons, excel=FALSE))
head(all_combined$data[[1]])
## name limma_logfc limma_adjp deseq_logfc deseq_adjp
## SPAC1002.01 SPAC1002.01 -0.99860 0.16930 -1.08000 0.36630
## SPAC1002.02 SPAC1002.02 0.03778 0.99460 -0.01485 0.98160
## SPAC1002.03c SPAC1002.03c -0.33910 0.02432 -0.22760 0.23270
## SPAC1002.04c SPAC1002.04c 0.31760 0.33060 0.33550 0.32470
## SPAC1002.05c SPAC1002.05c 0.75440 0.08050 0.74810 0.01187
## SPAC1002.06c SPAC1002.06c 0.69490 0.68500 0.50560 1.00000
## edger_logfc edger_adjp limma_ave limma_t limma_b limma_p
## SPAC1002.01 -1.05900 0.2201000 -0.1955 -2.8320 -4.0790 0.07147
## SPAC1002.02 -0.02342 1.0000000 2.8470 0.1354 -7.4050 0.90140
## SPAC1002.03c -0.23630 0.1598000 7.0770 -12.5400 -0.6495 0.00151
## SPAC1002.04c 0.32580 0.2151000 4.1960 1.7300 -6.5590 0.18830
## SPAC1002.05c 0.74120 0.0009489 3.9020 4.6960 -3.7270 0.02118
## SPAC1002.06c 0.69240 0.7328000 -1.9060 0.7146 -6.1430 0.52970
## deseq_basemean deseq_lfcse deseq_stat deseq_p edger_logcpm
## SPAC1002.01 11.150 0.8208 -1.31600 0.188200 0.06691
## SPAC1002.02 87.420 0.3316 -0.04479 0.964300 2.89400
## SPAC1002.03c 1621.000 0.1387 -1.64100 0.100800 7.09500
## SPAC1002.04c 222.200 0.2382 1.40900 0.159000 4.24700
## SPAC1002.05c 187.200 0.2463 3.03700 0.002391 3.99900
## SPAC1002.06c 4.176 1.4310 0.35340 0.723800 -0.89280
## edger_lr edger_p basic_nummed basic_denmed basic_numvar
## SPAC1002.01 2.745000 0.097570 0.000 0.000 0
## SPAC1002.02 0.007429 0.931300 3.100 2.774 3.603e-01
## SPAC1002.03c 3.399000 0.065220 6.909 7.248 2.955e-03
## SPAC1002.04c 2.794000 0.094620 4.407 4.195 5.418e-02
## SPAC1002.05c 14.270000 0.000158 4.272 3.625 1.293e-01
## SPAC1002.06c 0.370800 0.542600 0.000 0.000 0
## basic_denvar basic_logfc basic_t basic_p basic_adjp
## SPAC1002.01 0 0.0000 0.00000 0 0
## SPAC1002.02 2.890e-02 0.3260 -0.01124 9.919e-01 9.963e-01
## SPAC1002.03c 1.016e-03 -0.3390 9.25600 1.969e-03 3.718e-02
## SPAC1002.04c 1.441e-01 0.2125 -1.26900 2.862e-01 4.542e-01
## SPAC1002.05c 5.645e-02 0.6472 -2.95900 4.975e-02 1.630e-01
## SPAC1002.06c 0 0.0000 0.00000 0 0
## limma_adjp_fdr deseq_adjp_fdr edger_adjp_fdr basic_adjp_fdr
## SPAC1002.01 1.693e-01 4.186e-01 2.201e-01 0.000e+00
## SPAC1002.02 9.945e-01 1.000e+00 1.000e+00 9.954e-01
## SPAC1002.03c 2.432e-02 2.682e-01 1.598e-01 7.607e-03
## SPAC1002.04c 3.305e-01 3.722e-01 2.151e-01 4.027e-01
## SPAC1002.05c 8.050e-02 1.392e-02 9.489e-04 1.090e-01
## SPAC1002.06c 6.850e-01 9.279e-01 7.328e-01 0.000e+00
## fc_meta fc_var fc_varbymed p_meta p_var
## SPAC1002.01 -1.0520000 7.689e-04 -7.305e-04 1.191e-01 3.753e-03
## SPAC1002.02 0.0007106 3.191e-03 4.491e+00 9.323e-01 9.899e-04
## SPAC1002.03c -0.2681000 2.166e-03 -8.081e-03 5.584e-02 2.531e-03
## SPAC1002.04c 0.3262000 1.943e-04 5.956e-04 1.473e-01 2.297e-03
## SPAC1002.05c 0.7512000 1.320e-03 1.757e-03 7.910e-03 1.333e-04
## SPAC1002.06c 0.6331000 1.515e-02 2.394e-02 5.987e-01 1.178e-02
sig_genes <- sm(extract_significant_genes(all_combined, excel=FALSE))
head(sig_genes$limma$ups[[1]])
## name limma_logfc limma_adjp deseq_logfc deseq_adjp
## SPBC2F12.09c SPBC2F12.09c 7.075 0.01847 7.212 5.244e-66
## SPAC22A12.17c SPAC22A12.17c 5.609 0.02447 5.855 3.969e-19
## SPAPB1A11.02 SPAPB1A11.02 5.606 0.01696 6.739 1.893e-06
## SPCPB16A4.07 SPCPB16A4.07 5.576 0.01696 5.693 8.186e-199
## SPNCRNA.1611 SPNCRNA.1611 5.410 0.01696 5.612 5.040e-16
## SPBC660.05 SPBC660.05 5.229 0.02795 5.403 3.101e-14
## edger_logfc edger_adjp limma_ave limma_t limma_b limma_p
## SPBC2F12.09c 7.170 1.264e-180 2.5920 19.36 0.8017 0.0004519
## SPAC22A12.17c 5.822 3.155e-57 6.4820 12.49 -0.3953 0.0015260
## SPAPB1A11.02 6.483 1.257e-14 -1.1920 27.66 0.2698 0.0001667
## SPCPB16A4.07 5.684 4.519e-138 6.5360 28.74 2.2890 0.0001498
## SPNCRNA.1611 5.529 1.789e-33 0.5594 39.34 1.0980 0.0000622
## SPBC660.05 5.310 9.868e-74 3.6400 10.56 -0.5550 0.0024270
## deseq_basemean deseq_lfcse deseq_stat deseq_p
## SPBC2F12.09c 443.50 0.4123 17.490 1.662e-68
## SPAC22A12.17c 4289.00 0.6255 9.360 7.945e-21
## SPAPB1A11.02 21.20 1.2810 5.259 1.447e-07
## SPCPB16A4.07 4157.00 0.1875 30.370 1.366e-202
## SPNCRNA.1611 58.57 0.6571 8.541 1.329e-17
## SPBC660.05 523.80 0.6724 8.035 9.365e-16
## edger_logcpm edger_lr edger_p basic_nummed basic_denmed
## SPBC2F12.09c 5.2210 839.00 1.796e-184 6.250 -1.235
## SPAC22A12.17c 8.4930 264.90 1.479e-59 9.396 4.087
## SPAPB1A11.02 0.9166 65.83 4.910e-16 1.491 -3.604
## SPCPB16A4.07 8.4490 641.90 1.284e-141 9.416 3.641
## SPNCRNA.1611 2.3170 154.00 2.363e-35 3.380 -1.604
## SPBC660.05 5.4560 341.60 2.804e-76 6.156 1.381
## basic_numvar basic_denvar basic_logfc basic_t basic_p
## SPBC2F12.09c 2.211e-02 5.043e-01 7.485 -16.760 2.432e-03
## SPAC22A12.17c 2.236e-02 9.694e-01 5.309 -10.080 8.325e-03
## SPAPB1A11.02 6.869e-01 4.981e-01 5.095 -7.708 1.687e-03
## SPCPB16A4.07 2.022e-02 2.894e-01 5.776 -17.480 1.780e-03
## SPNCRNA.1611 1.174e-01 1.141e-01 4.984 -18.270 5.286e-05
## SPBC660.05 2.068e-01 5.143e-01 4.775 -10.840 9.581e-04
## basic_adjp limma_adjp_fdr deseq_adjp_fdr edger_adjp_fdr
## SPBC2F12.09c 3.928e-02 1.847e-02 6.157e-66 1.264e-180
## SPAC22A12.17c 6.437e-02 2.447e-02 4.660e-19 3.155e-57
## SPAPB1A11.02 3.452e-02 1.696e-02 2.224e-06 1.257e-14
## SPCPB16A4.07 3.524e-02 1.696e-02 9.615e-199 4.519e-138
## SPNCRNA.1611 1.455e-02 1.696e-02 5.921e-16 1.789e-33
## SPBC660.05 2.791e-02 2.795e-02 3.642e-14 9.869e-74
## basic_adjp_fdr fc_meta fc_var fc_varbymed p_meta
## SPBC2F12.09c 9.147e-03 7.152 0.000e+00 0.000e+00 1.506e-04
## SPAC22A12.17c 2.609e-02 5.916 1.123e-01 1.898e-02 5.087e-04
## SPAPB1A11.02 6.586e-03 6.137 5.880e-02 9.581e-03 5.561e-05
## SPCPB16A4.07 6.915e-03 5.632 2.810e-02 4.989e-03 4.993e-05
## SPNCRNA.1611 2.394e-04 5.521 2.657e-02 4.813e-03 2.073e-05
## SPBC660.05 3.914e-03 5.347 2.521e-02 4.714e-03 8.090e-04
## p_var
## SPBC2F12.09c 6.807e-08
## SPAC22A12.17c 7.762e-07
## SPAPB1A11.02 9.255e-09
## SPCPB16A4.07 7.480e-09
## SPNCRNA.1611 1.290e-09
## SPBC660.05 1.963e-06
## Here we see that edger and deseq agree the least:
all_comparisons$comparison$comp
## wt.30_vs_wt.120
## le 0.9601
## ld 0.9808
## ed 0.9814
## lb 0.9779
## eb 0.9782
## db 0.9777
## And here we can look at the set of 'significant' genes according to various tools:
yeast_sig <- extract_significant_genes(all_combined, excel=FALSE)
## Writing excel data sheet 0/1: wt.30_vs_wt.120
## Assuming the fold changes are on the log scale and so taking >< 0
## After (adj)p filter, the up genes table has 633 genes.
## After (adj)p filter, the down genes table has 629 genes.
## Assuming the fold changes are on the log scale and so taking -1.0 * fc
## After fold change filter, the up genes table has 325 genes.
## After fold change filter, the down genes table has 201 genes.
## Still not printing excel sheets for the significant genes.
## Writing excel data sheet 1/1: wt.30_vs_wt.120
## Assuming the fold changes are on the log scale and so taking >< 0
## After (adj)p filter, the up genes table has 1107 genes.
## After (adj)p filter, the down genes table has 1052 genes.
## Assuming the fold changes are on the log scale and so taking -1.0 * fc
## After fold change filter, the up genes table has 447 genes.
## After fold change filter, the down genes table has 278 genes.
## Still not printing excel sheets for the significant genes.
## Writing excel data sheet 2/1: wt.30_vs_wt.120
## Assuming the fold changes are on the log scale and so taking >< 0
## After (adj)p filter, the up genes table has 904 genes.
## After (adj)p filter, the down genes table has 733 genes.
## Assuming the fold changes are on the log scale and so taking -1.0 * fc
## After fold change filter, the up genes table has 403 genes.
## After fold change filter, the down genes table has 219 genes.
## Still not printing excel sheets for the significant genes.
## Writing excel data sheet 3/1: wt.30_vs_wt.120
## Assuming the fold changes are on the log scale and so taking >< 0
## After (adj)p filter, the up genes table has 277 genes.
## After (adj)p filter, the down genes table has 237 genes.
## Assuming the fold changes are on the log scale and so taking -1.0 * fc
## After fold change filter, the up genes table has 185 genes.
## After fold change filter, the down genes table has 105 genes.
## Still not printing excel sheets for the significant genes.
yeast_barplots <- sm(significant_barplots(combined=all_combined))
yeast_barplots$limma
yeast_barplots$edger
yeast_barplots$deseq
Since I didn’t acquire this data in a ‘normal’ way, I am going to post-generate a gff file which may be used by clusterprofiler, topgo, and gostats.
Therefore, I am going to make use of TxDb to make the requisite gff file.
limma_results <- limma_comparison$all_tables
## The set of comparisons performed
names(limma_results)
## [1] "wt.30_vs_wt.120"
table <- limma_results$wt.30_vs_wt.120
dim(table)
## [1] 7039 6
gene_names <- rownames(table)
updown_genes <- get_sig_genes(table, p=0.05, fc=0.4, p_column="P.Value")
## Assuming the fold changes are on the log scale and so taking >< 0
## After (adj)p filter, the up genes table has 1190 genes.
## After (adj)p filter, the down genes table has 1424 genes.
## Assuming the fold changes are on the log scale and so taking -1.0 * fc
## After fold change filter, the up genes table has 962 genes.
## After fold change filter, the down genes table has 1069 genes.
tt <- require.auto("GenomicFeatures")
tt <- require.auto("biomaRt")
ensembl_pombe <- biomaRt::useMart("fungal_mart", dataset="spombe_eg_gene", host="fungi.ensembl.org")
pombe_filters <- biomaRt::listFilters(ensembl_pombe)
head(pombe_filters, n=20) ## 11 looks to be my guy
## name description
## 1 chromosome_name Chromosome/scaffold name
## 2 start Start
## 3 end End
## 4 strand Strand
## 5 chromosomal_region e.g. 1:100:10000:-1, 1:100000:200000:1
## 6 with_chembl With ChEMBL ID(s)
## 7 with_ec_number With Enzyme EC Number ID(s)
## 8 with_embl With European Nucleotide Archive ID(s)
## 9 with_fypo With Fission Yeast Phenotype Ontology ID(s)
## 10 with_go With GO ID(s)
## 11 with_goslim_goa With GOSlim GOA ID(s)
## 12 with_protein_id With INSDC protein ID ID(s)
## 13 with_kegg With KEGG ID(s)
## 14 with_kegg_enzyme With KEGG Pathway and Enzyme ID(s)
## 15 with_merops With MEROPS - the Peptidase Database ID(s)
## 16 with_entrezgene With NCBI gene ID(s)
## 17 with_pombase_ortholog With Orthologous Gene ID(s)
## 18 with_pdb With PDB ID(s)
## 19 with_pombase_transcript With PomBase ID(s)
## 20 with_pombase_translation With PomBase (peptide) ID(s)
possible_pombe_attributes <- biomaRt::listAttributes(ensembl_pombe)
##pombe_goids <- biomaRt::getBM(attributes=c('pombase_gene_name', 'go_accession'), filters="biotype",
## values=gene_names, mart=ensembl_pombe)
pombe_goids <- biomaRt::getBM(attributes=c('pombase_transcript', 'go_id'),
values=gene_names, mart=ensembl_pombe)
colnames(pombe_goids) <- c("ID","GO")
pombe_goids_simple <- load_biomart_go(species="spombe", overwrite=TRUE,
dl_rows=c("pombase_transcript", "go_id"),
host="fungi.ensembl.org")
## Unable to perform useMart, perhaps the host/mart is incorrect: fungi.ensembl.org ENSEMBL_MART_ENSEMBL.
## The available marts are:
## fungal_martfungal_variations
## Trying the first one.
## Unable to perform useDataset, perhaps the given dataset is incorrect: spombe_gene_ensembl.
## Trying instead to use the dataset: spombe_eg_gene
## That seems to have worked, extracting the resulting annotations.
## Finished downloading ensembl go annotations, saving to spombe_go_annotations.rda.
## Saving ontologies to spombe_go_annotations.rda.
## Finished save().
head(pombe_goids_simple)
## ID GO
## 1 SPRRNA.50.1
## 2 SPNCRNA.1095.1
## 3 SPAC212.11.1 GO:0000784
## 4 SPAC212.11.1 GO:0005634
## 5 SPAC212.11.1 GO:0000166
## 6 SPAC212.11.1 GO:0005524
head(pombe_goids)
## ID GO
## 1 SPRRNA.50.1
## 2 SPNCRNA.1095.1
## 3 SPAC212.11.1 GO:0000784
## 4 SPAC212.11.1 GO:0005634
## 5 SPAC212.11.1 GO:0000166
## 6 SPAC212.11.1 GO:0005524
## This used to work, but does so no longer and I do not know why.
pombe <- sm(GenomicFeatures::makeTxDbFromBiomart(biomart="fungal_mart",
dataset="spombe_eg_gene",
host="fungi.ensembl.org"))
## Error in function (type, msg, asError = TRUE) : Server denied you to change to the given directory
## This was found at the bottom of: https://www.biostars.org/p/232005/
link <- "ftp://ftp.ensemblgenomes.org/pub/release-34/fungi/gff3/schizosaccharomyces_pombe/Schizosaccharomyces_pombe.ASM294v2.34.gff3.gz"
pombe <- GenomicFeatures::makeTxDbFromGFF(link, format="gff3", organism="Schizosaccharomyces pombe",
taxonomyId="4896")
## Import genomic features from the file as a GRanges object ...
## OK
## Prepare the 'metadata' data frame ...
## OK
## Make the TxDb object ...
## OK
pombe_transcripts <- as.data.frame(GenomicFeatures::transcriptsBy(pombe))
lengths <- pombe_transcripts[, c("group_name","width")]
colnames(lengths) <- c("ID","width")
## Something useful I didn't notice before:
## makeTranscriptDbFromGFF() ## From GenomicFeatures, much like my own gff2df()
gff_from_txdb <- GenomicFeatures::asGFF(pombe)
## why is GeneID: getting prefixed to the IDs!?
gff_from_txdb$ID <- gsub(x=gff_from_txdb$ID, pattern="GeneID:", replacement="")
written_gff <- rtracklayer::export.gff3(gff_from_txdb, con="pombe.gff")
summary(updown_genes)
## Length Class Mode
## up_genes 6 data.frame list
## down_genes 6 data.frame list
test_genes <- updown_genes$down_genes
rownames(test_genes) <- paste0(rownames(test_genes), ".1")
lengths$ID <- paste0(lengths$ID, ".1")
funkytown <- sm(simple_goseq(sig_genes=test_genes, go_db=pombe_goids, length_db=lengths))
head(funkytown$alldata)
## category over_represented_pvalue under_represented_pvalue
## 329 GO:0005634 3.364e-44 1
## 338 GO:0005730 1.046e-34 1
## 1182 GO:0042254 1.413e-30 1
## 122 GO:0003674 9.794e-29 1
## 340 GO:0005737 1.157e-24 1
## 372 GO:0005829 1.352e-20 1
## numDEInCat numInCat term ontology qvalue
## 329 100 576 nucleus CC 5.254e-41
## 338 35 61 nucleolus CC 8.166e-32
## 1182 25 31 ribosome biogenesis BP 7.356e-28
## 122 81 576 molecular_function MF 3.825e-26
## 340 77 574 cytoplasm CC 3.614e-22
## 372 70 557 cytosol CC 3.521e-18
funkytown$pvalue_plots$mfp_plot
test_genes <- updown_genes$up_genes
rownames(test_genes) <- paste0(rownames(test_genes), ".1")
funkytown <- sm(simple_goseq(sig_genes=test_genes, go_db=pombe_goids, length_db=lengths))
head(funkytown$alldata)
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 630 GO:0008150 8.921e-75 1 153
## 372 GO:0005829 1.015e-51 1 126
## 122 GO:0003674 1.827e-51 1 126
## 340 GO:0005737 7.741e-50 1 126
## 329 GO:0005634 1.742e-41 1 116
## 839 GO:0016020 4.310e-39 1 96
## numInCat term ontology qvalue
## 630 608 biological_process BP 1.393e-71
## 372 557 cytosol CC 7.930e-49
## 122 576 molecular_function MF 9.514e-49
## 340 574 cytoplasm CC 3.023e-47
## 329 576 nucleus CC 5.443e-39
## 839 413 membrane CC 1.122e-36
funkytown$pvalue_plots$bpp_plot
pander::pander(sessionInfo())
R version 3.4.1 (2017-06-30)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=en_US.utf8, LC_NUMERIC=C, LC_TIME=en_US.utf8, LC_COLLATE=en_US.utf8, LC_MONETARY=en_US.utf8, LC_MESSAGES=en_US.utf8, LC_PAPER=en_US.utf8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_US.utf8 and LC_IDENTIFICATION=C
attached base packages: parallel, stats4, methods, stats, graphics, grDevices, utils, datasets and base
other attached packages: GO.db(v.3.4.1), AnnotationDbi(v.1.38.2), fission(v.0.110.0), SummarizedExperiment(v.1.6.3), DelayedArray(v.0.2.7), matrixStats(v.0.52.2), Biobase(v.2.36.2), GenomicRanges(v.1.28.4), GenomeInfoDb(v.1.12.2), IRanges(v.2.10.2), S4Vectors(v.0.14.3), BiocGenerics(v.0.22.0), ruv(v.0.9.6) and hpgltools(v.2017.01)
loaded via a namespace (and not attached): minqa(v.1.2.4), Rtsne(v.0.13), colorspace(v.1.3-2), colorRamps(v.2.3), rprojroot(v.1.2), qvalue(v.2.8.0), htmlTable(v.1.9), corpcor(v.1.6.9), XVector(v.0.16.0), base64enc(v.0.1-3), ggrepel(v.0.6.5), bit64(v.0.9-7), codetools(v.0.2-15), splines(v.3.4.1), doParallel(v.1.0.10), DESeq(v.1.28.0), robustbase(v.0.92-7), geneplotter(v.1.54.0), knitr(v.1.16), ade4(v.1.7-6), Formula(v.1.2-2), nloptr(v.1.0.4), Rsamtools(v.1.28.0), pbkrtest(v.0.4-7), annotate(v.1.54.0), cluster(v.2.0.6), geneLenDataBase(v.1.12.0), compiler(v.3.4.1), backports(v.1.1.0), Matrix(v.1.2-10), lazyeval(v.0.2.0), limma(v.3.32.5), acepack(v.1.4.1), htmltools(v.0.3.6), tools(v.3.4.1), gtable(v.0.2.0), GenomeInfoDbData(v.0.99.0), reshape2(v.1.4.2), Rcpp(v.0.12.12), Biostrings(v.2.44.2), gdata(v.2.18.0), preprocessCore(v.1.38.1), nlme(v.3.1-131), rtracklayer(v.1.36.4), iterators(v.1.0.8), stringr(v.1.2.0), openxlsx(v.4.0.17), testthat(v.1.0.2), lme4(v.1.1-13), gtools(v.3.5.0), devtools(v.1.13.3), XML(v.3.98-1.9), edgeR(v.3.18.1), DEoptimR(v.1.0-8), MASS(v.7.3-47), zlibbioc(v.1.22.0), scales(v.0.4.1), BiocInstaller(v.1.26.0), RColorBrewer(v.1.1-2), yaml(v.2.1.14), goseq(v.1.28.0), memoise(v.1.1.0), gridExtra(v.2.2.1), ggplot2(v.2.2.1), pander(v.0.6.1), biomaRt(v.2.32.1), rpart(v.4.1-11), latticeExtra(v.0.6-28), stringi(v.1.1.5), RSQLite(v.2.0), highr(v.0.6), genefilter(v.1.58.1), foreach(v.1.4.3), RMySQL(v.0.10.12), checkmate(v.1.8.3), GenomicFeatures(v.1.28.4), caTools(v.1.17.1), BiocParallel(v.1.10.1), pkgconfig(v.2.0.1), rlang(v.0.1.1), bitops(v.1.0-6), evaluate(v.0.10.1), lattice(v.0.20-35), GenomicAlignments(v.1.12.1), htmlwidgets(v.0.9), labeling(v.0.3), bit(v.1.1-12), plyr(v.1.8.4), magrittr(v.1.5), variancePartition(v.1.6.0), DESeq2(v.1.16.1), R6(v.2.2.2), gplots(v.3.0.1), Hmisc(v.4.0-3), DBI(v.0.7), foreign(v.0.8-69), withr(v.2.0.0), mgcv(v.1.8-18), survival(v.2.41-3), RCurl(v.1.95-4.8), nnet(v.7.3-12), tibble(v.1.3.3), crayon(v.1.3.2), KernSmooth(v.2.23-15), rmarkdown(v.1.6), locfit(v.1.5-9.1), grid(v.3.4.1), sva(v.3.24.4), data.table(v.1.10.4), blob(v.1.1.0), digest(v.0.6.12), xtable(v.1.8-2), genoPlotR(v.0.8.6), munsell(v.0.4.3) and BiasedUrn(v.1.07)